/* * * Probe's multiple link eliminator for Matt's free for all links page * Version 1.0 * Usama Wazeer (usamaw@cs.utexas.edu) * URL: http://www.cs.utexas.edu/users/usamaw * * # Define the Output and Input files below.. * DONT define the same file for both this will * delete your file and you will lose all data. * # Compile this using your favorite C compiler.. * ex: gcc -o urlchk urlchk.c * # and then just run: urlchk * * The program will display the line number and entry for each * link that is repeated and create the output file. * * Feel free to copy or change this program in any way, * as long as you give me credit. :) * */ /******** DEFINE THESE TWO VARIABLES ********/ #define INPUT_FILE "/u/usamaw/www/links.html" #define OUTPUT_FILE "/u/usamaw/www/links.html.out" /********************************************/ #include #include typedef struct URLType { char url[150]; struct URLType *next; } URLStruct; URLStruct *URLList; #ifndef NULL #define NULL(type) (type)0; #endif URLStruct *init_urllist( void ) { URLStruct *l_list; l_list = (URLStruct *) malloc (sizeof(l_list)); l_list = NULL; return (l_list); } URLStruct *find_url(char *userhost) { URLStruct *User; if (!userhost) return NULL; for( User = URLList; User; User = User->next ) if( !strcasecmp( User->url, userhost ) ) return(User); return(NULL); } int readln_from_a_file( FILE *stream, char *lin) { char *p; do p = fgets( lin, 1000, stream ); while( ( p != NULL ) && ( *lin == '#') ); if( p == NULL ) return( 0 ); if (strchr(lin, '\n')) *strchr(lin, '\n') = '\0'; if (strchr(lin, '\r')) *strchr(lin, '\r') = '\0'; return( 1 ); } char *furl( char *userhost ) { URLStruct *dummy; if( (dummy = find_url(userhost)) != NULL ) return (dummy->url); return(NULL); } int add_to_urllist( char *url) { URLStruct *New_user; char buffer[200]; if( (New_user = find_url(url)) != NULL ) return 0; if( (New_user = (URLStruct *) malloc (sizeof(*New_user))) == NULL) return 0; strcpy(New_user->url, url); New_user->next = URLList; URLList = New_user; return 1; } int checkurl(char *url2) { char *url; url = strtok(url2, ">"); if(furl(url)) return 0; else { add_to_urllist(url); return 1; } } int read_urllist( char *filename, char *filename2 ) { FILE *fp; int i = 0; char lin[2000]; char url2[200]; char rest[2000]; URLStruct *dummy; FILE *list_file; if( ( fp = fopen( filename, "r" ) ) == NULL ) return 0; if( ( list_file = fopen( filename2, "w" ) ) == NULL ) return 0; for( dummy = URLList; dummy; dummy = dummy->next ) free(dummy); URLList = init_urllist(); while( readln_from_a_file( fp, lin) ) { i++; strcpy(url2, ""); sscanf(lin, "
  • \n", url2, rest); if(!*url2) fprintf( list_file, "%s\n", lin); else { if(checkurl(url2)) fprintf( list_file, "%s\n", lin); else printf("%-4i Entry: %s \n", i, lin); } } fclose( fp ); fclose( list_file ); return( 1 ); } int main() { printf("Here we go.... \n\n"); read_urllist(INPUT_FILE, OUTPUT_FILE); printf("\n\nAll done!!!!!\n"); }